# -*- coding: utf-8 -*-
import scrapy,json
from Recruit.items import ZhiLianItem

class ZhilianSpider(scrapy.Spider):
    name = 'zhilian'
    allowed_domains = ['zhaopin.com']

    # 定义初始URL
    def start_requests(self):
        # base_url = 'https://fe-api.zhaopin.com/c/i/sou?start={}&pageSize=60&cityId=530&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw=Linux&kt=3&lastUrlQuery={"p":2,"pageSize":"60","jl":"530","kw":"Linux","kt":"3"}&_v=0.75063069'
        # base_url = 'https://fe-api.zhaopin.com/c/i/sou?start={}&pageSize=60&cityId=530&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw=python&kt=3&lastUrlQuery=%7B%22p%22:3,%22pageSize%22:%2260%22,%22jl%22:%22530%22,%22kw%22:%22python%22,%22kt%22:%223%22%7D&_v=0.40259669'
        # base_url = 'https://fe-api.zhaopin.com/c/i/sou?start={}&pageSize=60&cityId=530&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw=php&kt=3&lastUrlQuery=%7B%22p%22:2,%22pageSize%22:%2260%22,%22jl%22:%22530%22,%22kw%22:%22php%22,%22kt%22:%223%22%7D&_v=0.26112726'
        base_url = 'https://fe-api.zhaopin.com/c/i/sou?start={}&pageSize=60&cityId=530&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw=UI&kt=3&lastUrlQuery=%7B%22p%22:2,%22pageSize%22:%2260%22,%22jl%22:%22530%22,%22kw%22:%22UI%22,%22kt%22:%223%22%7D&_v=0.94749110'
        for i in range(30):
            url = base_url.format(i*60)
            req = scrapy.Request(url=url, callback=self.parse)
            yield req

    def parse(self,response):
        res_dict = json.loads(response.body.decode('utf-8'))
        if 'data' not in res_dict:
            return
        # 获取职位的详情  URL
        detail_url = res_dict['data']['results']
        # print(detail_url)
        for dict in detail_url:
            # 获取到每一个详情页的url
            url = dict['positionURL']
            # 发起请求
            details = scrapy.Request(url=url,callback=self.parse_detail)
            yield details

    # 获取详情页信息
    def parse_detail(self,response):
        # print(response.text)
        # 使用xpath获取数据
        try:
            item = ZhiLianItem()
            title = response.xpath('//h1/text()').extract_first()
            salary = response.xpath('//strong/text()').extract_first()
            company = response.xpath('//li[@class="clearfix"]//a/text()').extract_first()
            addr = response.xpath('//p[@class="add-txt"]/text()').extract_first()
            experience = response.xpath('//div[@class="info-three l"]/span[2]/text()').extract_first()
            Education = response.xpath('//div[@class="info-three l"]/span[3]/text()').extract_first()
            desc = response.xpath('//div[@class="pos-ul"]//text()').extract()
            name = response.xpath('//p[@class="title"]/text()').extract_first()
            this_url = response.url
            name = name.split(' ')[0]
            desc = [i for i in desc]
            desc = ''.join(desc)
            item['title'] = title
            item['salary'] = salary
            item['company'] = company
            item['addr'] = addr
            item['experience'] = experience
            item['Education'] = Education
            item['name'] = name
            item['desc'] = desc
            item['this_url'] = this_url
            yield item

        except:
            print('This is 校园招聘')



